# Read in the data
rawTrain <- fread(file = params$trainPath)[, Partition := "Train"] %>%
rename(FirstFloorSF = "1stFlrSF", SecondFloorSF = "2ndFlrSF", ThreeSeasonPorch = "3SsnPorch")
rawTest <- fread(file = params$testPath)[, SalePrice := NA][, Partition := "Test"] %>%
rename(FirstFloorSF = "1stFlrSF", SecondFloorSF = "2ndFlrSF", ThreeSeasonPorch = "3SsnPorch")
rawData <- rbindlist(list(rawTrain, rawTest), use.names = TRUE)
colClasses <- sapply(rawData, class) %>% set_names(colnames(rawData))
# Create a DT::datatable for printing to HTML
make_dt_hpc <- function(dataSet, pageLength = 10) {
# Determine whether or not knitting is in progress
knitting <- isTRUE(getOption('knitr.in.progress'))
# Create the DT::datatable object
dt <- DT::datatable(
data = dataSet
, filter = "top"
, class = "nowrap"
, escape = FALSE
, width = if (knitting) "100%" else NULL
, height = if (knitting) "auto" else NULL
, options = list(scrollX = TRUE, pageLength = pageLength)
)
# Return the DT::datatable object
return(dt)
}
# Print the section title
raw_html_header("House Prices Kaggle Competition - EDA", headerLevel = 1, details = FALSE)
# Loop over most of the columns in the raw data, creating tables and graphs
colData <- sapply(
setdiff(colnames(rawData), c("Id", "SalePrice", "Partition")) %>% sort()
# "LotFrontage"
, simplify = FALSE
, function(colName) {
# Print a header for the column
raw_html_header(tools::toTitleCase(colName), headerLevel = 2, open = openDetails)
# Extract the columns
fullCol <- rawData[[colName]]
trainCol <- rawTrain[[colName]]
testCol <- rawTest[[colName]]
# Get the column class, & info on missing values
colClass <- class(fullCol)
numMissingFull <- KO::num_missing(fullCol)
numMissingTrain <- KO::num_missing(trainCol)
numMissingTest <- KO::num_missing(testCol)
percMissingFull <- KO::percent_missing(fullCol)
percMissingTrain <- KO::percent_missing(trainCol)
percMissingTest <- KO::percent_missing(testCol)
# Get the number of distinct values, & summaries for numeric columns
numDistinctFull <- n_distinct(fullCol)
numDistinctTrain <- n_distinct(trainCol)
numDistinctTest <- n_distinct(testCol)
colSummaryFull <- summary(fullCol)
colSummaryTrain <- summary(trainCol)
colSummaryTest <- summary(testCol)
# Determine whether or not knitting is in progress (used to choose how to print certain)
knitting <- isTRUE(getOption('knitr.in.progress'))
# Create an info table
infoData <- data.table(
Dataset = c("Full", "Train", "Test")
, "Number Missing" = c(numMissingFull, numMissingTrain, numMissingTest)
, "Percent Missing" = c(percMissingFull, percMissingTrain, percMissingTest)
, Summary = list(colSummaryFull, colSummaryTrain, colSummaryTest) %>%
sapply(function(x) paste0(names(x), ": ", x, collapse = " | "))
)
infoTable <- make_dt_hpc(infoData) %>%
DT::formatPercentage(., columns = c("Percent Missing"), digits = 2)
# Print the info table
raw_html_header("Column Info", headerLevel = 3)
if (knitting) cat(knitr::knit_print(infoTable)) else print(infoTable)
cat("</details>")
# Set the number of bins to use
numBins <- ifelse(numDistinctTrain > 20, 10, numDistinctTrain)
# Create the quantile-spaced data & plot
quantileData <- KO::binned_one_way_data(x = trainCol, yData = rawTrain[, .(SalePrice)], bins = numBins) %>%
setnames(old = c("Bins__", "SalePrice", "Weight__"), new = c("Bins", "Sale Price", "Weight"))
quantileTable <- make_dt_hpc(quantileData, pageLength = numBins) %>%
DT::formatPercentage(., columns = "Weight", digits = 2) %>%
DT::formatRound(., columns = "Sale Price", digits = 0)
quantilePlot <- KO::binned_one_way_plot(x = trainCol, yData = rawTrain[, .(SalePrice)],
xlab = tools::toTitleCase(colName), ylab = "Sale Price", bins = numBins, plotly = params$plotly)
# Create the equally-spaced data & plot
equalData <- KO::binned_one_way_data(x = trainCol, yData = rawTrain[, .(SalePrice)], type = "equal", bins = numBins) %>%
setnames(old = c("Bins__", "SalePrice", "Weight__"), new = c("Bins", "Sale Price", "Weight"))
equalTable <- make_dt_hpc(equalData, pageLength = numBins) %>%
DT::formatPercentage(., columns = "Weight", digits = 2) %>%
DT::formatRound(., columns = "Sale Price", digits = 0)
equalPlot <- KO::binned_one_way_plot(x = trainCol, yData = rawTrain[, .(SalePrice)], type = "equal",
xlab = tools::toTitleCase(colName), ylab = "Sale Price", bins = numBins, plotly = params$plotly)
# Show the quantile data
raw_html_header("Quantile-Spaced Data", headerLevel = 3)
if (knitting) cat(knitr::knit_print(quantileTable)) else print(quantileTable)
cat("</details>")
# Show the quantile plot
raw_html_header("Quantile-Spaced Plot", headerLevel = 3)
print(if (params$plotly) htmltools::tagList(quantilePlot) else quantilePlot)
cat("</details>")
# Show the equally spaced data & plot, if needed
if (numDistinctTrain > 20) {
# Show the equally spaced data
raw_html_header("Equally-Spaced Data", headerLevel = 3)
if (knitting) cat(knitr::knit_print(equalTable)) else print(equalTable)
cat("</details>")
# Show the equal spaced plot
raw_html_header("Equally-Spaced Plot", headerLevel = 3)
print(if (params$plotly) htmltools::tagList(equalPlot) else equalPlot)
cat("</details>")
}
# Close the details section, & return the info
cat("</details>")
return(dplyr::lst(
colClass, numMissingFull, numMissingTrain, numMissingTest,
percMissingFull, percMissingTrain, percMissingTest,
colSummaryFull, colSummaryTrain, colSummaryTest,
quantileData, quantilePlot, equalData, equalPlot,
infoTable, quantileTable, equalTable
))
}
)
Alley
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BedroomAbvGr
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BldgType
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtCond
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtExposure
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtFinSF1
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
BsmtFinSF2
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
BsmtFinType1
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtFinType2
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtFullBath
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtHalfBath
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtQual
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
BsmtUnfSF
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
CentralAir
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Condition1
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Condition2
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Electrical
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
EnclosedPorch
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
ExterCond
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Exterior1st
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Exterior2nd
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
ExterQual
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Fence
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
FireplaceQu
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Fireplaces
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
FirstFloorSF
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
Foundation
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
FullBath
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Functional
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
GarageArea
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
GarageCars
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
GarageCond
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
GarageFinish
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
GarageQual
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
GarageType
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
GarageYrBlt
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
GrLivArea
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
HalfBath
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Heating
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
HeatingQC
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
HouseStyle
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
KitchenAbvGr
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
KitchenQual
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
LandContour
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
LandSlope
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
LotArea
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
LotConfig
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
LotFrontage
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
LotShape
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
LowQualFinSF
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
MasVnrArea
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
MasVnrType
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
MiscFeature
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
MiscVal
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
MoSold
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
MSSubClass
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
MSZoning
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Neighborhood
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
OpenPorchSF
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
OverallCond
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
OverallQual
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
PavedDrive
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
PoolArea
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
PoolQC
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
RoofMatl
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
RoofStyle
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
SaleCondition
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
SaleType
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
ScreenPorch
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
SecondFloorSF
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
Street
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
ThreeSeasonPorch
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
TotalBsmtSF
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
TotRmsAbvGrd
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Utilities
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
WoodDeckSF
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
YearBuilt
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
YearRemodAdd
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot
Equally-Spaced Data
Equally-Spaced Plot
YrSold
Column Info
Quantile-Spaced Data
Quantile-Spaced Plot